package com.tl.spark.scala

import org.apache.hadoop.hbase.{HBaseConfiguration, HConstants}
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.{TableInputFormat, TableOutputFormat}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.{SparkConf, SparkContext}

/**
  * @program: spark-test
  * @description:
  * @author: dong.tl
  * @create: 2018-12-19 10:57
  **/
object Count_PN {

  def main(args: Array[String]): Unit = {
    val tableName = "CN_FUL_DB"
    //Hbase设置
    val conf = HBaseConfiguration.create()
    //设置zooKeeper集群地址，也可以通过将hbase-site.xml导入classpath，但是建议在程序里这样设置
    conf.set(HConstants.ZOOKEEPER_QUORUM, "db01,db02,db03")
    //设置zookeeper连接端口，默认2181
    conf.set("hbase.zookeeper.property.clientPort", "2181")
    //设置读取的表
    conf.set(TableInputFormat.INPUT_TABLE, tableName)


    //创建sparkConf
    val sparkConf = new SparkConf()
    //设置spark的任务名
    sparkConf.setAppName("Count_PN")
    //设置master地址
    sparkConf.setMaster("spark://192.168.173.247:7077")
      //    sparkConf.setMaster("local[*]")
      .setJars(Seq("F:\\IdeaProjects\\spark-test\\target\\spark-test-1.0-SNAPSHOT.jar"))
    //创建spark上下文
    val sc = new SparkContext(sparkConf)


    //全量读取hbase表
    val rdd = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat]
      , classOf[ImmutableBytesWritable]
      , classOf[Result]
    )


    rdd.map { case (immutableBytesWritable: ImmutableBytesWritable, result: Result) => {
      val rowkey = Bytes.toString(immutableBytesWritable.get())

      val bib_pn_s = Bytes.toString(result.getValue("info".getBytes(), "bib_pn_s".getBytes()))
      val filePath = Bytes.toString(result.getValue("info".getBytes(), "file_path".getBytes()))
      val fileName = Bytes.toString(result.getValue("info".getBytes(), "file_name".getBytes()))

      (bib_pn_s, rowkey, filePath, fileName)
    }
    }.groupBy(r => r._1).filter(_._2.size > 1).saveAsTextFile("hdfs://db03:9000/tmp/Count_PN_2/")


  }

}
